
import os

#1 表示 启动 cuda模拟
os.environ["NUMBA_ENABLE_CUDASIM"] = "1";

from numba import cuda
import numpy as np
import math
from time import time


@cuda.jit
def gpu_add(a,b,result,n):
    idx = cuda.threadIdx.x + cuda.blockDim.x * cuda.blockIdx.x
    if idx < n:
        result[idx] = a[idx] + b[idx]

def gpu(x,y,n):
    x_device = cuda.to_device(x)
    y_device = cuda.to_device(y)
    gpu_result = cuda.device_array(n)
    threads_per_block = 1024
    blocks_per_grid = math.ceil(n/threads_per_block)
    start = time()
    gpu_add[blocks_per_grid,threads_per_block](x_device,y_device,gpu_result,n)
    cuda.synchronize()
    print("gpu vector add time "+str(time() - start))

def cpu(x,y,n):
    cpu_result = np.empty(n)
    start = time()
    cpu_result = np.add(x, y)
    print("cpu vector add time " + str(time() - start))


def main():
    n = 20000000
    x = np.arange(n).astype(np.int32)
    y = 2 * x
    gpu(x,y,n)
    cpu(x,y,n)
#    if(np.array_equal(cpu_result,gpu_result.copy_to_host())):
#        print("result correct!")

if __name__ == "__main__":
    main()
